#include "utility.h"
#include<iostream>

int bitCount(uint64 i)
{
	i = i - ((i >> 1) & 0x5555555555555555);
	i = (i & 0x3333333333333333) + ((i >> 2) & 0x3333333333333333);
	i = (i + (i >> 4)) & 0x0f0f0f0f0f0f0f0f;
	i = i + (i >> 8);
	i = i + (i >> 16);
	i = i + (i >> 32);
	return (int)i & 0x7f;
}

int popcount(uint64 i)
{
	return(wordbits[i & 0xFFFF] + wordbits[(i >> 16) & 0xFFFF] + wordbits[(i >> 32) & 0xFFFF] + wordbits[i >> 48]);
}

int GetDataSize(string filename, int **DataSize, int *nY_G, int numofCov)
{
	ifstream fp, fp_i;
	int ndatasets = 0, nsamples = 0, nsnps = 0, ii;
	string filename_i;
	string line;

	fp.open(filename);
	if (fp.fail())
	{
		cout << "Can't open filenamelist.txt" << endl;
		exit(1);
	}

	while (!fp.eof())
	{
		ndatasets++;
		getline(fp, line);
	}

	*DataSize = (int*)calloc(ndatasets * 2, sizeof(int));
	//*nY_G = (int*)calloc(pow(2,NumOfCov+1), sizeof(int));

	ii = 0;
	fp.clear();
	fp.seekg(0, ios::beg);

	while (!fp.eof())
	{
		nsamples = 0;
		ii++;
		getline(fp, filename_i);
		fp_i.open(filename_i);
		if (fp_i.fail())
		{
			cout << "Can't open file " << filename_i << endl;
			exit(1);
		}

		cout << "Start getting data size of file " << filename_i << endl;

		//int row = 0;
		while (!fp_i.eof())
		{
			getline(fp_i, line);
			if (line.size() == 0)
				continue;
			nsamples++;
			if (ii == 1)
			{
				int index = 0;
				for (int i = 0; i < numofCov + 1; i++)
				{
					index = index * 2 + (line[2 * i] - '0');
				}
				nY_G[index]++;
			}

			if (nsamples == 1)
			{
				for (int i = 0; i < line.size(); i++)
				{
					if (isdigit(line[i]))nsnps++;
				}
			}
		}

		(*DataSize)[ii - 1] = nsamples;
		(*DataSize)[ii] = nsnps - numofCov - 1;
		return ndatasets;

	}
}

void CalculateMarginalDistr(uint64** genoY_G, int nsnps, int nsamples, int* nlongintY_G, int* pMarginalDistrSNP, int* pMarginalDistrSNP_Y, int numofTables)
{
	int count = 0;
	for (int snp = 0; snp < nsnps; snp++)
	{
		for (int i = 0; i < 3; i++)
		{
			for (int j = 0; j < numofTables; j++)
			{
				count = 0;
				for (int index = 0; index < nlongintY_G[j]; index++)
				{
					count += bitCount(genoY_G[j][index*NumOfGenotype*nsnps + i*nsnps + snp]);
				}
				pMarginalDistrSNP_Y[(i*numofTables + j)*nsnps + snp] = count;
			}
			for (int j = 0; j < numofTables; j++)
			{
				pMarginalDistrSNP[i*nsnps + snp] += pMarginalDistrSNP_Y[(i*numofTables + j)*nsnps + snp];
			}

		}

	}
	return;
}

//absolute value
double Abs(double a)
{
	return (a < 0) ? -a : a;
}


int BOOST2(char* filelistname, char* outputfileprefix, int numofCov, float testThreshold, int numofThread)
{
	time_t st, ed, totalst, totalend;
	int *pMarginalDistrSNP;
	int *pMarginalDistrSNP_Y;
	int *DataSize;
	int ndataset;

	int nsamples, nsnps;
	int *nY_G, *nlongintY_G;
	int *GenoJointDistr;

	uint64 mask = 0x0000000000000001;

	int bufferSize = 50000;
	int buferSizeAssociation = 50000;

	ifstream fp, fp_i;
	string filename_i, inputfilename(filelistname);
	string outputfilename(outputfileprefix);
	outputfilename = outputfilename + "InteractionResult.txt";

	uint64 **genoY_G;
	ofstream logfile("log.txt");
	ofstream fout;
	vector<pair<int,int> >interactionPairs;
	vector<double>interactionMeasure;
	double thresholdRecord = testThreshold;
	int numofTables = pow(2, numofCov + 1);

	time(&totalst);


	//open filenamelist
	fp.open(inputfilename);
	if (fp.fail())
	{
		cout << "can't open filenamelist.txt" << endl;
		logfile << "can't open filenamelist.txt" << endl;
		return -1;
	}


	//count bit of '1'
	for (int i = 0; i < 65536; i++)
	{
		wordbits[i] = bitCount(i);
	}

	//Get data size then load data
	cout << "Start loading data..." << endl;
	logfile << "Start loading data..." << endl;
	nY_G = (int*)calloc(pow(2, numofCov + 1), sizeof(int));
	ndataset = GetDataSize(inputfilename, &DataSize, nY_G, numofCov);

	for (int i = 0; i < ndataset; i++)
	{
		nsamples = DataSize[2 * i];
		nsnps = DataSize[2 * i + 1];
	}

	printf("Number of samples: %d\n", nsamples);
	logfile << "Number of samples: " << nsamples << endl;
	printf("Number of SNPs: %d\n", nsnps);
	logfile << "Number of SNPs: " << nsnps << endl;


	nlongintY_G = (int*)calloc(numofTables, sizeof(int));
	for (int i = 0; i < numofTables; i++)
	{
		nlongintY_G[i] = ceil((double)nY_G[i] / LengthLongType);
		//printf("Long int type used:%d \n", nlongintY_G[i]);
	}
	fflush(stdout);

	genoY_G = (uint64**)calloc(numofTables, sizeof(uint64*));
	for (int i = 0; i < numofTables; i++)
	{
		genoY_G[i] = (uint64*)calloc(3 * nsnps*nlongintY_G[i], sizeof(uint64));
	}

	fp.clear();
	fp.seekg(0, ios::beg);

	//LoadData(inputfilename, &genoY_G, &DataSize);

	int ii = 0, visited_snp = 0, cnt_snp = 0;

	time(&st);
	while (!fp.eof())
	{
		ii++;
		vector<int>iY_G(numofTables, 0);
		string tmp_file;
		int row = 0, col = 0;
		getline(fp, tmp_file);
		fp_i.open(tmp_file);
		if (fp_i.fail())
		{
			printf("Can't open input file %s\n", tmp_file.c_str());
			exit(1);
		}

		printf("Loading data in file %s\n", tmp_file.c_str());
		fflush(stdout);

		while (!fp_i.eof())
		{
			string line;
			getline(fp_i, line);

			if (line.size() == 0)continue;

			int index = 0;
			cnt_snp = 0;
			for (int i = 0; i < numofCov + 1; i++)
			{
				index = index * 2 + (line[i * 2] - '0');
			}



			for (int i = (numofCov + 1) * 2; i < line.size(); i++)
			{
				if (line[i]<'0' || line[i]>'2')continue;
				genoY_G[index][((iY_G[index] / LengthLongType) * 3 + (line[i] - '0'))*nsnps + (cnt_snp + visited_snp)] |= (mask << (iY_G[index] % LengthLongType));
				cnt_snp++;
			}

			iY_G[index]++;
		}
		fp_i.close();
		visited_snp += cnt_snp;
	}

	fp.close();
	time(&ed);

	printf("CPU time used in loading data: %d (s)", ed - st);


	fflush(stdout);
	free(DataSize);

	pMarginalDistrSNP = (int*)calloc(numofTables*nsnps, sizeof(int));
	pMarginalDistrSNP_Y = (int*)calloc(numofTables*NumOfGenotype*nsnps, sizeof(int));

	CalculateMarginalDistr(genoY_G, nsnps, nsamples, nlongintY_G, pMarginalDistrSNP, pMarginalDistrSNP_Y, numofTables);


	GenoJointDistr = (int *)calloc(numofTables*NumOfCell, sizeof(int));

	GetInteractionPairs(genoY_G, nsnps, nsamples, numofCov, nlongintY_G, nY_G, pMarginalDistrSNP, pMarginalDistrSNP_Y, wordbits, 65536, interactionPairs, interactionMeasure, thresholdRecord, numofThread
	);
	
	
	fout.open(outputfilename);
	for (int i = 0; i < interactionMeasure.size(); i++)
	{
		fout << "Pair No." << i << ":  " << interactionPairs[i].first << "  " << interactionPairs[i].second << "  " << interactionMeasure[i] << endl;
	}


	//free memory space
	for (int i = 0; i < numofTables; i++)
	{
		free(genoY_G[i]);
	}
	free(genoY_G);
	free(nY_G);
	free(nlongintY_G);
	free(pMarginalDistrSNP);
	free(pMarginalDistrSNP_Y);
	time(&totalend);

	cout << "Total time used: " << totalend - totalst << endl;


	return 0;
}